Setup

This project is about same-sex sexual behavior (SSB) in mammals. It has previously been demonstrated that SSB has developed numerous times across the mammal phylogeny (Gómez et al. 2023), but the underlying patterns behind where and why SSB evolves remains largely unknown. This work serves to compare existing phylogenetic data for SSB’s presence with that of other covariates, with the purpose of investigating potential ties between SSB and mammal sociality, maternal investment, confusion, diversification, and extinction. We approached our analysis of SSB under the traditional assumptions of the field, namely that SSB, as an energy intensive but reproductively unfruitful, is a largely nonadaptive or maladaptive trait. To test this, we broke our analysis into three primary sections: maternal investment, designed to investigate the energy costs involved in engaging in nonreproductive sexual behavior, sociality, designed to investigate at a deeper level previously established ties between social behavior and SSB, and confusion and maladaptation, designed test theories that posit SSB existing as rendering no fitness benefit. ANALYSES METHODS DETAILS HERE. The details of each category of study and individual hypotheses can be found below:

Making things into functions so we don’t have to rewrite so much code

do_mcmc <- function(f) {
  # File names
  analysis_folder <- paste0(ANALYSIS, "_", SSBTYPE, "_", HYPOTHESIS, "/")
  ifelse(!dir.exists(paste0("../output/", analysis_folder)), dir.create(paste0("../output/", analysis_folder)), FALSE)
  analysis_name <- paste0(ANALYSIS, "_", SSBTYPE, "_", HYPOTHESIS, "_", tree_number, "_", NUMBER)
  print(paste0("Performing analysis: ", analysis_name), quote = F)
  out_file <- paste0("../output/", analysis_folder, analysis_name, ".log.csv")
  summary_file <- paste0("../output/", analysis_folder, analysis_name, ".sum.txt")

  # Read in data
  columns <- c("Species", all.vars(f))
  data_subset <- data[, columns]
  # Removes incomplete cases
  data_subset <- data_subset[complete.cases(data_subset), ]

  # Running MCMCglmm
  MCMCanalysis <- MCMCglmm::MCMCglmm(f,
    random = ~Species,
    family = "categorical",
    ginverse = list(Species = inv.phylo),
    prior = prior,
    data = data_subset,
    nitt = NITT,
    burnin = BURNIN,
    thin = THIN,
    verbose = TRUE
  )

  write.csv(MCMCanalysis$Sol, out_file, row.names = FALSE, quote = FALSE)
  out <- capture.output(
    cat(paste0("ESS: ", ess(MCMCanalysis$Sol), "\n")),
    print(summary(MCMCanalysis$Sol))
  )
  write(out, summary_file)
}
do_pagel <- function() {
  # File names
  analysis_folder <- paste0(ANALYSIS, "_", SSBTYPE, "_", VARIABLE, "/")
  ifelse(!dir.exists(paste0("../output/", analysis_folder)), dir.create(paste0("../output/", analysis_folder)), FALSE)
  analysis_name <- paste0(ANALYSIS, "_", SSBTYPE, "_", VARIABLE, "_", tree_number, "_", NUMBER)
  print(paste0("Performing analysis: ", analysis_name), quote = F)
  summary_file <- paste0("../output/", analysis_folder, analysis_name, ".sum.txt")

  # Read in data
  columns <- c("Species", SSBTYPE, VARIABLE)
  data_subset <- data[, columns]
  # Removes incomplete cases
  data_subset <- data_subset[complete.cases(data_subset), ]
  phy_subset <- ape::keep.tip(phy, data_subset$Species)

  # Assign species names
  SSB <- setNames(data_subset[[SSBTYPE]], data_subset$Species)
  VAR <- setNames(data_subset[[VARIABLE]], data_subset$Species)

  write(paste0("Pagel's Directional Test for ", SSBTYPE, " and ", VARIABLE), summary_file)
  write("--------------------", summary_file, append = TRUE)

  # Fit models where SSB and NAC are totally independent or dependent
  fit_SSB_VAR <- phytools::fitPagel(phy_subset, SSB, VAR)
  out1 <- capture.output(print(fit_SSB_VAR))
  write(paste0("Both Independent or Dependent"), summary_file, append = TRUE)
  write(out1, summary_file, append = TRUE)
  write("--------------------", summary_file, append = TRUE)

  # Fit model where SSB depends on NAC
  fit_SSB <- phytools::fitPagel(phy_subset, SSB, VAR, dep.var = "x")
  out2 <- capture.output(print(fit_SSB))
  write(paste0("Dependent ", SSBTYPE), summary_file, append = TRUE)
  write(out2, summary_file, append = TRUE)
  write("--------------------", summary_file, append = TRUE)

  # Fit model where NAC depends on SSB
  fit_VAR <- phytools::fitPagel(phy_subset, SSB, VAR, dep.var = "y")
  out3 <- capture.output(print(fit_VAR))
  write(paste0("Dependent ", VARIABLE), summary_file, append = TRUE)
  write(out3, summary_file, append = TRUE)
  write("--------------------", summary_file, append = TRUE)

  # Comparing the goodness of all 4 models using AIC
  aic <- setNames(
    c(
      fit_SSB_VAR$independent.AIC,
      fit_SSB$dependent.AIC,
      fit_VAR$dependent.AIC,
      fit_SSB_VAR$dependent.AIC
    ),
    c("independent", "dependent SSB", paste0("dependent ", VARIABLE), paste0("dependent SSB & ", VARIABLE))
  )
  out4 <- capture.output(print(aic))
  out5 <- capture.output(print(aic.w(aic)))
  write(paste0("AIC"), summary_file, append = TRUE)
  write(out4, summary_file, append = TRUE)
  write(paste0("Weights"), summary_file, append = TRUE)
  write(out5, summary_file, append = TRUE)
}
process_mcmc <- function(ssbtype, hypothesis) {
  files <- Sys.glob(paste0("../output/MCMCGLMM_", ssbtype, "_", hypothesis, "/*.log.csv"))
  results <- data.frame(matrix(NA, nrow = 0, ncol = 10))
  for (file in files) {
    name_str <- strsplit(strsplit(basename(file), "\\.")[[1]][1], "_")[[1]]
    treetype <- if (name_str[4] == "TREEMCC") {
      "TREEMCC"
    } else {
      "RANDOM"
    }
    number <- name_str[5]
    log <- read.csv(file)
    log$treetype <- treetype
    log$number <- number
    results <- rbind(results, log)
  }
  colnames(results)[1] <- "Intercept"
  params <- colnames(results)[1:(ncol(results) - 2)]

  plot_list <- list()
  ind <- 1
  for (param in params) {
    hpd95 <- hdi(results[[param]], credMass = .95)
    hpd90 <- hdi(results[[param]], credMass = .9)
    hpd80 <- hdi(results[[param]], credMass = .8)
    hpd_vals <- sort(c(hpd80[[1]], hpd90[[1]], hpd95[[1]], hpd95[[2]], hpd90[[2]], hpd80[[2]], 0))
    if (hpd_vals[1] == 0) {
      sig <- 4
      hpdsig <- c(1, 1, 1, 1, 1, 1)
    }
    if (hpd_vals[2] == 0) {
      sig <- 3
      hpdsig <- c(0, 1, 1, 1, 1, 1)
    }
    if (hpd_vals[3] == 0) {
      sig <- 2
      hpdsig <- c(0, 0, 1, 1, 1, 1)
    }
    if (hpd_vals[4] == 0) {
      sig <- 1
      hpdsig <- c(0, 0, 0, 0, 0, 0)
    }
    if (hpd_vals[5] == 0) {
      sig <- 2
      hpdsig <- c(1, 1, 1, 1, 0, 0)
    }
    if (hpd_vals[6] == 0) {
      sig <- 3
      hpdsig <- c(1, 1, 1, 1, 1, 0)
    }
    if (hpd_vals[7] == 0) {
      sig <- 4
      hpdsig <- c(1, 1, 1, 1, 1, 1)
    }
    markers <- c("", "*", "**", "***")
    colors <- c("black", "gold", "darkorange", "red")
    color <- colors[sig]
    line_colors <- c()
    for (i in 1:6) {
      line_colors[i] <- if (hpdsig[i] == 0) {
        "black"
      } else {
        color
      }
    }

    plot <- ggplot(results) +
      geom_vline(xintercept = 0) +
      geom_density(aes_string(x = param, y = "..scaled..", group = "treetype", lty = "treetype"), color = color) +
      geom_vline(xintercept = hpd80[[1]], lty = "dotted", color = line_colors[3]) +
      geom_vline(xintercept = hpd90[[1]], lty = "dotted", color = line_colors[2]) +
      geom_vline(xintercept = hpd95[[1]], lty = "dotted", color = line_colors[1]) +
      geom_vline(xintercept = hpd95[[2]], lty = "dotted", color = line_colors[6]) +
      geom_vline(xintercept = hpd90[[2]], lty = "dotted", color = line_colors[5]) +
      geom_vline(xintercept = hpd80[[2]], lty = "dotted", color = line_colors[4]) +
      ggtitle(paste0(param, markers[sig])) +
      theme_bw() +
      theme(
        plot.title = element_text(hjust = 0.5),
        axis.title = element_blank(),
        legend.position = "none",
        aspect.ratio = 1
      )
    plot_list[[ind]] <- plot
    ind <- ind + 1
  }
  big_plot <- wrap_plots(plot_list, nrow = 1)
  ggsave(paste0("../figures/", ssbtype, "_", hypothesis, ".png"), big_plot, dpi = 600, width = 3 * length(plot_list), height = 3)
}
process_pagel <- function(ssbtype, variable) {
  files <- Sys.glob(paste0("../output/PAGEL_", ssbtype, "_", variable, "/*.sum.txt"))
  names <- c("Independent", paste0("Dependent ", ssbtype), paste0("Dependent ", variable), "Interdependent")

  file_contents <- readLines(paste0("../output/PAGEL_", ssbtype, "_", variable, "/PAGEL_", ssbtype, "_", variable, "_TREEMCC_001.sum.txt"))
  aic <- data.frame(matrix(as.numeric(strsplit(file_contents[(length(file_contents) - 3)], " +")[[1]][2:5]), ncol = 4))
  weight <- data.frame(matrix(as.numeric(strsplit(file_contents[(length(file_contents))], " +")[[1]][2:5]), ncol = 4))
  results <- rbind(aic, weight)
  colnames(results) <- names

  aics <- data.frame(matrix(NA, nrow = 0, ncol = 4))
  weights <- data.frame(matrix(NA, nrow = 0, ncol = 4))
  colnames(aics) <- names
  colnames(weights) <- names

  for (file in files) {
    name_str <- strsplit(strsplit(basename(file), "\\.")[[1]][1], "_")[[1]]
    treetype <- if (name_str[4] == "TREEMCC") {
      "TREEMCC"
    } else {
      "RANDOM"
    }
    number <- name_str[5]
    if (treetype == "RANDOM") {
      file_contents <- readLines(file)
      aic <- data.frame(matrix(as.numeric(strsplit(file_contents[(length(file_contents) - 3)], " +")[[1]][2:5]), ncol = 4))
      weight <- data.frame(matrix(as.numeric(strsplit(file_contents[(length(file_contents))], " +")[[1]][2:5]), ncol = 4))
      colnames(aic) <- names
      colnames(weight) <- names
      aics <- rbind(aics, aic)
      weights <- rbind(weights, weight)
    }
  }

  aic_means <- colMeans(aics)
  aic_weights <- aic.w(aic_means)
  results <- rbind(results, aic_means, aic_weights)
  rownames(results) <- c("MCC AIC", "MCC Weights", "Mean AIC", "Mean Weights")
  return(results)
}

Dataset

DESCRIPTION OF DATASET HERE

Data Cleaning:

The species list used in this analysis was updated to fit with Phylacine 1.2 from Gómez et al.’s subset 4 and 1 for the character trait and diversification-extinction analyses, respectively. Discrepancies between the two data sets were evaluated using the American Society of Mammalogists’ Mammal Diversity Database, with the most up to date taxonomy being applied (Supplemental table for name changes to 1700). During those discrepancies, if Phylacine 1.2’s taxonomy did not reflect that of the Mammal Diversity Database, note of it was added to the R analysis so that it could be automatically changed during phylogeny reading, but still maintained as the most accurate form in our dataset (Supplemental table for dataset to phylogeny changes). In cases of species that had been duplicated or are now included under other taxa, if SSB was present for said duplicate or inclusion, it was listed as present for the final dataset.

NLV = No Longer Valid. Includes all species removed from analysis.

## [1] Reading in data...
# This code chunk reads our giant list of trees and combines them to make one "best" tree
# We want to see this code (echo=TRUE), but we don't want to run it every time we do our analyses (it takes a while)
ANALYSIS <- "MCMCTREE"
if (USEGLOBAL) {
  set_global()
}
if (ANALYSIS == "MCCTREE") {
  print("Generating MCC tree...", quote = F)
  trees <- ape::read.nexus("../data/phylogeny.nex")
  phy_mcc <- phangorn::maxCladeCred(trees) # Generates a maximum clade credibility (best) tree from the 1000 trees
  cat(paste0("Is Ultrametric? ", ape::is.ultrametric(phy_mcc)))
  cat(paste0("Is Bifurcating? ", castor::is_bifurcating(phy_mcc)))
  n_taxa <- length(phy_mcc$tip.label) # Gets the number of species from looking at the tip labels
  phy_mcc$node.label <- c((n_taxa + 1):(n_taxa + phy_mcc$Nnode)) # Gives number labels to nodes
  ape::write.tree(phy_mcc, "../data/mcc_tree.txt") # Saves this tree to a file called "mcc_tree.txt"
}

These species are in the dataset but not in the tree.

## [1] Getting tree subset...
## [1] Total species: 380

Here is the phylogeny that we are using, only including taxa which are in the species dataset.

##       HYP
## SSB     AM IUCN MGS  MI RDM SDT SOC
##   FSSB 110  110 110 110 109 110 110
##   MSSB 110  110 110 110 110 110 110
##   SSB  110  110 110 110 110 110 110
## [1] Mean ESS for non-IUCN: 2870.01768569985
##       VAR
## SSB    NAC RDM SDT SPI TSP
##   FSSB 101 101 101 101 101
##   MSSB 101 101 101 101 101
##   SSB  101 101 101 101 101
## [1] Missing MCMCglmm
## [1] FSSB RDM TREEMCC 002
## [1] Missing Pagel
## [1] Missing SSE
## [1] Artiodactyla_Terrestrial,Eulipotyphla,Rodentia_Hystricomorpha,Rodentia_Supramyomorpha_Myomorphi,Xenarthra

Maternal Investment

Hypothesis:

Species with greater maternal investment need to allocate more resources to reproductively fruitful mating, and are thus less likely to exhibit SSB.

Covariates:

↑ in factor associated with ↑ in SSB: Alloparental Care (NAC), Progeny Count (PC), Paternal Investment (SPI)

↑ in factor associated with ↓ in SSB: Gestation Period (GP), Wean Age (FWA), Birth Mass to Maternal Mass Ratio (MR), Typically Single Progeny (TSP)


MCMCGLMM

SSB

# Runs MCMCglmm for all our Maternal investment variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "MI"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
  set_global()
}
f <- SSB ~ NAC + PC + SPI + GP + FWA + MR + TSP + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "MI" & SSBTYPE == "SSB") {
  do_mcmc(f)
}
process_mcmc("SSB", "MI")

FSSB

# Runs MCMCglmm for all our Maternal investment variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "MI"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
  set_global()
}
f <- FSSB ~ NAC + PC + SPI + GP + FWA + MR + TSP + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "MI" & SSBTYPE == "FSSB") {
  do_mcmc(f)
}
process_mcmc("FSSB", "MI")

MSSB

# Runs MCMCglmm for all our Maternal investment variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "MI"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
  set_global()
}
f <- MSSB ~ NAC + PC + SPI + GP + FWA + MR + TSP + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "MI" & SSBTYPE == "MSSB") {
  do_mcmc(f)
}
process_mcmc("MSSB", "MI")


Pagel’s Directional Test

NAC

SSB vs. NAC

ANALYSIS <- "PAGEL"
VARIABLE <- "NAC"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
  set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "NAC" & SSBTYPE == "SSB") {
  do_pagel()
}
##               Independent Dependent SSB Dependent NAC Interdependent
## MCC AIC      876.07790000  876.51920000   873.6340000    869.7375000
## MCC Weights    0.03447264    0.02764744     0.1169947      0.8208852
## Mean AIC     872.02224300  870.92616900   868.6963980    866.3957480
## Mean Weights   0.04053752    0.07012399     0.2138234      0.6755151

FSSB vs. NAC

ANALYSIS <- "PAGEL"
VARIABLE <- "NAC"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
  set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "NAC" & SSBTYPE == "FSSB") {
  do_pagel()
}
##               Independent Dependent FSSB Dependent NAC Interdependent
## MCC AIC      8.175612e+02   8.065332e+02   797.0561000    796.5144000
## MCC Weights  1.520000e-05   3.772340e-03     0.4310614      0.5651511
## Mean AIC     8.108147e+02   8.008762e+02   790.8805830    789.3284680
## Mean Weights 1.475650e-05   2.123688e-03     0.3144962      0.6833653

MSSB vs. NAC

ANALYSIS <- "PAGEL"
VARIABLE <- "NAC"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
  set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "NAC" & SSBTYPE == "MSSB") {
  do_pagel()
}
##               Independent Dependent MSSB Dependent NAC Interdependent
## MCC AIC      872.76520000    870.7479000   868.7923000    872.2087000
## MCC Weights    0.08095864      0.2219797     0.5901332      0.1069285
## Mean AIC     868.95487800    865.8295970   863.1962920    864.5043430
## Mean Weights   0.03046083      0.1453409     0.5422541      0.2819442

SPI

SSB vs. SPI

ANALYSIS <- "PAGEL"
VARIABLE <- "SPI"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
  set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "SPI" & SSBTYPE == "SSB") {
  do_pagel()
}
##              Independent Dependent SSB Dependent SPI Interdependent
## MCC AIC      771.4071000   774.2612000   773.9583000   777.24560000
## MCC Weights    0.6356291     0.1525577     0.1775058     0.03430735
## Mean AIC     773.1099100   776.1113720   776.7810180   779.56274000
## Mean Weights   0.7031401     0.1567771     0.1121685     0.02791436

FSSB vs. SPI

ANALYSIS <- "PAGEL"
VARIABLE <- "SPI"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
  set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "SPI" & SSBTYPE == "FSSB") {
  do_pagel()
}
##              Independent Dependent FSSB Dependent SPI Interdependent
## MCC AIC      712.8904000    714.4053000  716.82640000   718.39120000
## MCC Weights    0.5979076      0.2803356    0.08354970     0.03820703
## Mean AIC     711.9023480    714.2993070  715.70249700   717.86099400
## Mean Weights   0.6657625      0.2008289    0.09956975     0.03383886

MSSB vs. SPI

ANALYSIS <- "PAGEL"
VARIABLE <- "SPI"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
  set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "SPI" & SSBTYPE == "MSSB") {
  do_pagel()
}
##              Independent Dependent MSSB Dependent SPI Interdependent
## MCC AIC      768.0943000    768.4744000  777.47240000   771.75590000
## MCC Weights    0.5009084      0.4141977    0.00460613     0.08028777
## Mean AIC     769.8637060    771.0115040  773.59023100   774.91761300
## Mean Weights   0.5560522      0.3132380    0.08628016     0.04442972

TSP

SSB vs. TSP

ANALYSIS <- "PAGEL"
VARIABLE <- "TSP"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
  set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "TSP" & SSBTYPE == "SSB") {
  do_pagel()
}
##               Independent Dependent SSB Dependent TSP Interdependent
## MCC AIC      7.111246e+02   693.3173000  7.045169e+02    691.6653000
## MCC Weights  4.133000e-05     0.3041366  1.124870e-03      0.6946972
## Mean AIC     7.063789e+02   692.1758110  7.006746e+02    689.1878340
## Mean Weights 1.506138e-04     0.1828179  2.609362e-03      0.8144222

FSSB vs. TSP

ANALYSIS <- "PAGEL"
VARIABLE <- "TSP"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
  set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "TSP" & SSBTYPE == "FSSB") {
  do_pagel()
}
##               Independent Dependent FSSB Dependent TSP Interdependent
## MCC AIC      6.526080e+02    641.4890000  656.28110000    640.4984000
## MCC Weights  1.455620e-03      0.3780117    0.00023197      0.6203007
## Mean AIC     6.451634e+02    637.0395640  648.46426000    636.6502010
## Mean Weights 7.701537e-03      0.4473382    0.00147841      0.5434819

MSSB vs. TSP

ANALYSIS <- "PAGEL"
VARIABLE <- "TSP"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
  set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "TSP" & SSBTYPE == "MSSB") {
  do_pagel()
}
##               Independent Dependent MSSB Dependent TSP Interdependent
## MCC AIC      7.078119e+02    690.7188000  7.007736e+02    690.5031000
## MCC Weights  9.159000e-05      0.4715532  3.091450e-03      0.5252637
## Mean AIC     7.030473e+02    689.4114070  6.973026e+02    685.9594980
## Mean Weights 1.647985e-04      0.1506406  2.913407e-03      0.8462812

Sociality

Hypothesis 1:

Social species whose group structure involves increased time spent exclusively with the same sex will route more sexual behavior to same-sex interactions.

Tested covariates:

↑ in group type associated with ↑ in SSB: MBG, HBG, FEG, SSG

↑ in group type associated with ↓ in SSB: SOL, MOP, HAR, MMG, HVG


MCMCGLMM

SSB

# Runs MCMCglmm for all our Sociality variables.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "MGS"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
  set_global()
}
f <- SSB ~ MGS + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "MGS" & SSBTYPE == "SSB") {
  do_mcmc(f)
}
process_mcmc("SSB", "MGS")

FSSB

# Runs MCMCglmm for all our Sociality variables.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "MGS"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
  set_global()
}
f <- FSSB ~ MGS + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "MGS" & SSBTYPE == "FSSB") {
  do_mcmc(f)
}
process_mcmc("FSSB", "MGS")

MSSB

# Runs MCMCglmm for all our Sociality variables.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "MGS"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
  set_global()
}
f <- MSSB ~ MGS + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "MGS" & SSBTYPE == "MSSB") {
  do_mcmc(f)
}
process_mcmc("MSSB", "MGS")

Hypothesis 2:

Territorial animals are more likely to exhibit SSB as a means of mitigating the risk of fatal conspecific violence.

Tested covariates:

↑ in factor associated with ↑ in SSB: Territoriality

Note: The Nature paper that plotted out the phylogeny of SSB demonstrated that SSB often evolves subsequently to male adulticide, which they attributed to SSB potentially acting as a means of mitigation that allows for aggression release without necessitating more violence. This hypothesis is trying to build on that, which is why it doesn’t match the “Darwinian Paradox” approach of the others.


MCMCGLMM

SSB

# Runs MCMCglmm for all our Territoriality variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "SDT"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
  set_global()
}
f <- SSB ~ SDT + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "SDT" & SSBTYPE == "SSB") {
  do_mcmc(f)
}
process_mcmc("SSB", "SDT")

FSSB

# Runs MCMCglmm for all our Territoriality variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "SDT"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
  set_global()
}
f <- FSSB ~ SDT + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "SDT" & SSBTYPE == "FSSB") {
  do_mcmc(f)
}
process_mcmc("FSSB", "SDT")

MSSB

# Runs MCMCglmm for all our Territoriality variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "SDT"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
  set_global()
}
f <- MSSB ~ SDT + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "SDT" & SSBTYPE == "MSSB") {
  do_mcmc(f)
}
process_mcmc("MSSB", "SDT")


Pagel’s Directional Test

SSB vs. SDT

ANALYSIS <- "PAGEL"
VARIABLE <- "SDT"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
  set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "SDT" & SSBTYPE == "SSB") {
  do_pagel()
}
##               Independent Dependent SSB Dependent SDT Interdependent
## MCC AIC      8.841425e+02  8.693098e+02  8.833553e+02    857.2350000
## MCC Weights  1.430000e-06  2.382060e-03  2.120000e-06      0.9976144
## Mean AIC     8.922041e+02  8.757033e+02  8.948902e+02    860.2998310
## Mean Weights 1.180017e-07  4.518304e-04  3.080311e-08      0.9995480

FSSB vs. SDT

ANALYSIS <- "PAGEL"
VARIABLE <- "SDT"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
  set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "SDT" & SSBTYPE == "FSSB") {
  do_pagel()
}
##               Independent Dependent FSSB Dependent SDT Interdependent
## MCC AIC      8.267492e+02   8.204581e+02  8.268040e+02    807.3492000
## MCC Weights  6.119000e-05   1.421620e-03  5.954000e-05      0.9984576
## Mean AIC     8.264918e+02   8.191242e+02  8.272329e+02    805.7894920
## Mean Weights 3.191423e-05   1.270097e-03  2.203222e-05      0.9986760

MSSB vs. SDT

ANALYSIS <- "PAGEL"
VARIABLE <- "SDT"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
  set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "SDT" & SSBTYPE == "MSSB") {
  do_pagel()
}
##               Independent Dependent MSSB Dependent SDT Interdependent
## MCC AIC      8.970788e+02   8.841207e+02  9.010894e+02    865.3950000
## MCC Weights  1.300000e-07   8.585000e-05  2.000000e-08      0.9999140
## Mean AIC     8.926114e+02   8.806535e+02  8.913589e+02    866.5156130
## Mean Weights 2.152773e-06   8.503977e-04  4.026859e-06      0.9991434

Hypothesis 3:

Solitary species that demonstrate occasional grouping are more likely to have same-sex interactions than purely solitary species, and are thus more likely to exhibit SSB. Monogamous pairs and small family units are not considered groups on their own.

Tested covariates:

↑ in factor associated with ↑ in SSB: Occasional Groupings in Solitary Species.

Note: The same Nature paper already established that social species are associated with ↑ SSB, so this extends that to a deeper analysis of solitary species.


MCMCGLMM

SSB

# Runs MCMCglmm for all our Territoriality variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "SOC"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
  set_global()
}
f <- SSB ~ SOC + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "SOC" & SSBTYPE == "SSB") {
  do_mcmc(f)
}
process_mcmc("SSB", "SOC")

FSSB

# Runs MCMCglmm for all our Territoriality variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "SOC"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
  set_global()
}
f <- FSSB ~ SOC + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "SOC" & SSBTYPE == "FSSB") {
  do_mcmc(f)
}
process_mcmc("FSSB", "SOC")

MSSB

# Runs MCMCglmm for all our Territoriality variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "SOC"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
  set_global()
}
f <- MSSB ~ SOC + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "SOC" & SSBTYPE == "MSSB") {
  do_mcmc(f)
}
process_mcmc("MSSB", "SOC")

Confusion & Maladaptation

Hypothesis 1:

Species with notably dimorphic external morphology (excluding sex organs) are less likely to confuse members of the same and opposite sex, and are therefore less likely to exhibit SSB.

Tested covariates:

↑ in factor associated with ↓ in SSB: Radically Dimorphic Morphology


MCMCGLMM

SSB

# Runs MCMCglmm for all our Confusion variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "RDM"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
  set_global()
}
f <- SSB ~ RDM + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "RDM" & SSBTYPE == "SSB") {
  do_mcmc(f)
}
process_mcmc("SSB", "RDM")

FSSB

# Runs MCMCglmm for all our Confusion variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "RDM"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
  set_global()
}
f <- FSSB ~ RDM + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "RDM" & SSBTYPE == "FSSB") {
  do_mcmc(f)
}
process_mcmc("FSSB", "RDM")

MSSB

# Runs MCMCglmm for all our Confusion variables. Individual binary variables each get their own suite of pagels below.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "RDM"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
  set_global()
}
f <- MSSB ~ RDM + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "RDM" & SSBTYPE == "MSSB") {
  do_mcmc(f)
}
process_mcmc("MSSB", "RDM")


Pagel’s Directional Test

SSB vs. RDM

ANALYSIS <- "PAGEL"
VARIABLE <- "RDM"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
  set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "RDM" & SSBTYPE == "SSB") {
  do_pagel()
}
##               Independent Dependent SSB Dependent RDM Interdependent
## MCC AIC      8.205488e+02   804.8648000  8.146288e+02    804.1058000
## MCC Weights  1.590900e-04     0.4049364  3.070120e-03      0.5918344
## Mean AIC     8.251443e+02   810.6359730  8.185348e+02    807.3721540
## Mean Weights 1.153017e-04     0.1630363  3.141015e-03      0.8337074

FSSB vs. RDM

ANALYSIS <- "PAGEL"
VARIABLE <- "RDM"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
  set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "RDM" & SSBTYPE == "FSSB") {
  do_pagel()
}
##               Independent Dependent FSSB Dependent RDM Interdependent
## MCC AIC      7.620322e+02   740.71090000  7.520281e+02    732.0076000
## MCC Weights  3.000000e-07     0.01272105  4.436000e-05      0.9872343
## Mean AIC     7.639368e+02   745.02648900  7.536751e+02    742.1824900
## Mean Weights 1.517561e-05     0.19384648  2.566996e-03      0.8035714

MSSB vs. RDM

ANALYSIS <- "PAGEL"
VARIABLE <- "RDM"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
  set_global()
}
if (ANALYSIS == "PAGEL" & VARIABLE == "RDM" & SSBTYPE == "MSSB") {
  do_pagel()
}
##               Independent Dependent MSSB Dependent RDM Interdependent
## MCC AIC      8.172361e+02    802.7140000  810.06420000    804.6963000
## MCC Weights  5.027100e-04      0.7157185    0.01814122      0.2656375
## Mean AIC     8.218154e+02    808.4522860  813.59788700    808.0085470
## Mean Weights 5.390539e-04      0.4299256    0.03281257      0.5367228

Hypothesis 2:

Species with higher ages of maturity are more likely to observe SSB while young and try to replicate same-sex sexual interactions upon reaching adulthood.

Tested covariates:

↑ in factor associated with ↑ in SSB: Male Age of Maturity, Female Age of Maturity.


MCMCGLMM

SSB

# Runs MCMCglmm for all our Maturity variables.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "AM"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
  set_global()
}
f <- SSB ~ FAM + MAM + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "AM" & SSBTYPE == "SSB") {
  do_mcmc(f)
}
process_mcmc("SSB", "AM")

FSSB

# Runs MCMCglmm for all our Maturity variables.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "AM"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
  set_global()
}
f <- FSSB ~ FAM + MAM + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "AM" & SSBTYPE == "FSSB") {
  do_mcmc(f)
}
process_mcmc("FSSB", "AM")

MSSB

# Runs MCMCglmm for all our Maturity variables.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "AM"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
  set_global()
}
f <- MSSB ~ FAM + MAM + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "AM" & SSBTYPE == "MSSB") {
  do_mcmc(f)
}
process_mcmc("MSSB", "AM")

Hypothesis 3:

SSB is an energy intensive but reproductively unfruitful endeavor, and will therefore be associated with reduced species success.

Tested covariates:

↑ in SSB associated with ↑ in factor: High IUCN Endangerment Status, Extinction Rate.

↑ in SSB associated with ↓ in factor: Diversification.


MCMCGLMM

SSB

# Runs MCMCglmm for all our IUCN variables.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "IUCN"
SSBTYPE <- "SSB"
if (USEGLOBAL) {
  set_global()
}
f <- SSB ~ IUCN + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "IUCN" & SSBTYPE == "SSB") {
  do_mcmc(f)
}
process_mcmc("SSB", "IUCN")

FSSB

# Runs MCMCglmm for all our IUCN variables.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "IUCN"
SSBTYPE <- "FSSB"
if (USEGLOBAL) {
  set_global()
}
f <- FSSB ~ IUCN + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "IUCN" & SSBTYPE == "FSSB") {
  do_mcmc(f)
}
process_mcmc("FSSB", "IUCN")

MSSB

# Runs MCMCglmm for all our IUCN variables.
ANALYSIS <- "MCMCGLMM"
HYPOTHESIS <- "IUCN"
SSBTYPE <- "MSSB"
if (USEGLOBAL) {
  set_global()
}
f <- MSSB ~ IUCN + 1
if (ANALYSIS == "MCMCGLMM" & HYPOTHESIS == "IUCN" & SSBTYPE == "MSSB") {
  do_mcmc(f)
}
process_mcmc("MSSB", "IUCN")

SSE

files <- Sys.glob(paste0("../output/*/*.log"))

sp_plot_list <- list()
ex_plot_list <- list()
ind <- 1

for (file in files) {
  clade <- strsplit(basename(file), "\\.")[[1]][1]
  log <- read.csv(file, sep = "\t")
  results <- log[c("speciation.1.", "speciation.2.", "speciation.3.", "speciation.4.", "extinction.1.", "extinction.2.", "extinction.3.", "extinction.4.")]

  plot_sp <- ggplot(results) +
    geom_density(aes(x = speciation.1., y = ..density..), bounds = c(0, 1), color = "blue") +
    geom_density(aes(x = speciation.2., y = ..density..), bounds = c(0, 1), color = "red") +
    geom_density(aes(x = speciation.3., y = ..density..), bounds = c(0, 1), color = "blue", lty = "dotted") +
    geom_density(aes(x = speciation.4., y = ..density..), bounds = c(0, 1), color = "red", lty = "dotted") +
    ggtitle(clade) +
    theme_bw() +
    theme(
      plot.title = element_text(hjust = 0.5),
      axis.title = element_blank(),
      legend.position = "none",
      aspect.ratio = 1
    )
  sp_plot_list[[ind]] <- plot_sp

  plot_ex <- ggplot(results) +
    geom_density(aes(x = extinction.1., y = ..density..), bounds = c(0, 1), color = "blue") +
    geom_density(aes(x = extinction.2., y = ..density..), bounds = c(0, 1), color = "red") +
    geom_density(aes(x = extinction.3., y = ..density..), bounds = c(0, 1), color = "blue", lty = "dotted") +
    geom_density(aes(x = extinction.4., y = ..density..), bounds = c(0, 1), color = "red", lty = "dotted") +
    ggtitle(clade) +
    theme_bw() +
    theme(
      plot.title = element_text(hjust = 0.5),
      axis.title = element_blank(),
      legend.position = "none",
      aspect.ratio = 1
    )
  ex_plot_list[[ind]] <- plot_ex

  ind <- ind + 1
}

big_plot_sp <- wrap_plots(sp_plot_list, ncol = 4)
ggsave(paste0("../figures/SSE_sp.png"), big_plot_sp, dpi = 600, width = 16, height = 16)

big_plot_ex <- wrap_plots(ex_plot_list, ncol = 4)
ggsave(paste0("../figures/SSE_ex.png"), big_plot_ex, dpi = 600, width = 16, height = 16)

Example Analyses

Correlations

This analysis determines whether factors are significantly correlated with SSB, either via a chi-square test (categorical factors) or a t-test (quantitative factors). These tests do not incorporate phylogeny and are only used as a preliminary check for whether each factor should be analyzed more deeply.

# This code chunk assesses how each independent variable correlates with SSB
# It prints out each statistical test, and also creates a file called "correlations.csv" that documents them
# Here we use results="hold" to tell R markdown to put all the output in one section

SSB <- data$SSB

tests <- data.frame(matrix(NA, ncol = 3, nrow = 0)) # This matrix will hold the results of our statistical tests for correlations
for (name in colnames(data)) { # We want to look at all the independent variables and check whether they are correlated with SSB
  if (name %in% categorical) { # Checking if this is a categorical variable
    ind <- data[[name]] # Getting the data from the column
    test <- suppressWarnings(chisq.test(x = SSB, y = ind)$p.value) # Performing a chi-square test of significance and getting the p-value from it
    if (test <= 0.05) { # Checks if the p-value is significant - if yes, do the next thing
      row <- c(name = name, p = test, keep = "Y") # Recording "Y" in the "keep" column if the test shows significance
    } else { # If the p-value was not significant, do this instead
      row <- c(name = name, p = test, keep = "N") # Recording "N" in the "keep" column if the test does not show significance
    }
    tests <- rbind(tests, row) # Adding the results of this test (for this variable) to the list of tests for all variables
    cat(paste0("\nVariable: ", gsub(".", " ", row[1], fixed = TRUE), " | p = ", round(as.numeric(row[2]), 4), " | Significant: ", row[3]), "\n") # Printing the statistical test being performed
    print(suppressWarnings(chisq.test(x = SSB, y = ind)$observed)) # Printing the details of this statistical test
  }
  if (name %in% quantitative) { # Checking if this is a quantitative variable
    ind <- data[[name]] # Getting the data from the column
    test <- suppressWarnings(t.test(ind ~ SSB, data = cbind(SSB, ind), na.rm = TRUE)$p.value) # Performing a t-test of significance and getting the p-value from it
    if (test <= 0.05) { # Checks if the p-value is significant - if yes, do the next thing
      row <- c(name = name, p = test, keep = "Y") # Recording "Y" in the "keep" column if the test shows significance
    } else { # If the p-value was not significant, do this instead
      row <- c(name = name, p = test, keep = "N") # Recording "N" in the "keep" column if the test does not show significance
    }
    tests <- rbind(tests, row) # Adding the results of this test (for this variable) to the list of tests for all variables
    cat(paste0("\nVariable: ", gsub(".", " ", row[1], fixed = TRUE), " | p = ", round(as.numeric(row[2]), 4), " | Significant: ", row[3]), "\n") # Printing the statistical test being performed
    print(suppressWarnings(t.test(ind ~ SSB, data = cbind(SSB, ind), na.rm = TRUE)$estimate)) # Printing the details of this statistical test
  }
}

colnames(tests) <- c("name", "p", "keep") # Naming the columns for our dataframe with all the statistical test results
write.csv(tests, "correlations.csv", quote = FALSE, sep = ",", row.names = FALSE, col.names = TRUE) # Saving our statistical tests to a file

Phylogenetic Signal

In this analysis, we are checking whether a particular binary trait shows phylogenetic signal. Essentially, do species which are more closely related show traits that are more similar? If d=1, this indicates that they do not (no phylogenetic signal) – basically, the trait is totally random on the tree. Otherwise, the trait shows phylogenetic signal. The test also shows whether the trait follows a Brownian model, where trait similarity is based on the amount of time species spend evolving together. If d=0, this indicates that the trait follows a Brownian model. If d>0 (but d<1), this indicates that species are more different than expected under a Brownian model, while d<0 indicates that they are more similar.

# This code chunk conducts a test for phylogenetic signal (d) using the "caper" package
# d=1 indicates complete randomness of trait (no phylogenetic signal), all rest have phylogenetic signal
# d=0 trait follows Brownian, d positive under 1 more different than Brownian, d negative more similar than Brownian

caper::phylo.d(data, phy, Species, SSB) # Tests for phylogenetic signal in SSB, shows more differentiation than expected under Brownian structure
caper::phylo.d(data, phy, Species, FSSB) # same analysis for female SSB. Positive D shows more differentiation between species than expected.
caper::phylo.d(data, phy, Species, MSSB) # same analysis for male SSB. Positive D shows more differentiation between species than expected.
caper::phylo.d(data, phy, Species, TSP) # Tests for phylogenetic signal in typically single progeny, shows Brownian structure

Transition Rates

In this analysis, we are estimating transition rates between different combinations of traits (in this case, combinations of FSSB and TSP) based on a model that we define. We decide which rates are allowed to be different from one another, and then estimate rates under that model. This is somewhat different from Pagel’s directional test, which compares multiple models (matrices of rate parameters) to see which one best describes the data. When we run Pagel’s directional test we are also doing this, so we don’t have to run this analysis separately.

# This code chunk does an ancestral character estimation (ACE) for SSB using the "ape" package
# It also determines the rates of transition between different characters / sets of characters
# We use a CTMC to determine relationship between different variable in a matrix

dataAFSSBmtrx <- matrix(c(0, 1, 2, 0), 2) # Forms matrix for future use by AFSSB
dataAFSSB <- ape::ace(data$SSB, phy, type = "discrete", model = dataAFSSBmtrx) # Reconstructed ancestral states of SSB

for (i in 1:nrow(data)) { # Create for loop for finding four states of dual relation between FSSB and TSP
  dataifFSSB <- data$SSB[i]
  dataifTSP <- data$TSP[i]
  if (dataifFSSB == 0) {
    if (dataifTSP == 0) {
      state <- 1 # no FSSB, no TSP
    } else {
      state <- 2 # no FSSB, yes TSP
    }
  } else {
    if (dataifTSP == 0) {
      state <- 3 # yes FSSB, no TSP
    } else {
      state <- 4 # yes FSSB, yes TSP
    }
  }
  data$FSSB_TSP[i] <- state # Takes new state info and adds it as a row to data named FSSB_TSP
}

dataAFSSB_TSPmtrx <- matrix(c(0, 1, 2, 0, 3, 0, 0, 4, 5, 0, 0, 6, 0, 7, 8, 0), 4, byrow = TRUE) # Forms matrix for use in FSSB_TSP rate estimates, manually adjusted 0 terms for things we determine impossible
dataAFSSB_TSP <- ape::ace(data$FSSB_TSP, phy, type = "discrete", model = dataAFSSB_TSPmtrx) # Generates rate matrix for AFSSB_TSP
print(dataAFSSB_TSP)

In this analysis, we are comparing multiple models (matrices of rate parameters) to see which one best describes the data. We will compare 4 models:

  1. SSB and TSP evolve independently of one another
  2. SSB depends on TSP
  3. TSP depends on SSB
  4. Both SSB and TSP depend on one another
# This code chunk does pagel's directional test
# Pagel's directional test: http://www.phytools.org/Cordoba2017/ex/9/Pagel94-method.html
# These take a long time to run, so don't be worried if results don't print immediately

if (RERUN) {
  FSSB <- setNames(data$FSSB, data$Species) # Naming our FSSB values with species names
  TSP <- setNames(data$TSP, data$Species) # Naming our TSP values with species names

  fit_FSSB_TSP <- phytools::fitPagel(phy, FSSB, TSP) # Fitting models where FSSB and TSP are totally independent or dependent
  fit_FSSB_TSP
  plot(fit_FSSB_TSP, lwd.by.rate = TRUE)

  fit_FSSB <- phytools::fitPagel(phy, FSSB, TSP, dep.var = "x") # Fitting models where FSSB depends on TSP
  fit_FSSB
  plot(fit_FSSB, lwd.by.rate = TRUE)

  fit_TSP <- phytools::fitPagel(phy, FSSB, TSP, dep.var = "y") # Fitting models where TSP depends on FSSB
  fit_TSP
  plot(fit_TSP, lwd.by.rate = TRUE)

  # Comparing the goodness of all 4 models using AIC
  aic <- setNames(
    c(
      fit_FSSB_TSP$independent.AIC,
      fit_FSSB$dependent.AIC,
      fit_TSP$dependent.AIC,
      fit_FSSB_TSP$dependent.AIC
    ),
    c("independent", "dependent FSSB", "dependent TSP", "dependent FSSB & TSP")
  )
  aic
  aic.w(aic)
}

MCMCglmm

This analysis determines how a collection of independent variables contribute to a dependent variable (in this case, SSB). It creates a function where SSB is a function of several variables, and the coefficients of those variables tell us whether the variable is significantly related to SSB. If the quantiles for a particular coefficient exclude 0, then the variable is significant. The sign of the coefficient tells us the direction of the relationship (positive = increased SSB, negative = decreased SSB).

# This code chunk runs an MCMCglmm analysis

if (RERUN) {
  data_subset <- data[, c("Species", "TSP", "SSB", "FSSB", "MSSB")]
  data_subset <- data_subset[complete.cases(data_subset), ] # Removes incomplete cases (NAs)
  print(data_subset) # Prints that data

  # Reads a tree and makes sure all tips are exactly at the present (ultrametric)
  # Then creates a matrix for how much time species pairs have spent evolving together vs separately
  inv.phylo <- inverseA(phy, "TIPS")$Ainv

  # Makes these things called priors, which are prior assumptions on variance distribution for the following model
  prior <- list(
    G = list(G1 = list(V = 1, nu = 0.02)), # This is the prior on random effects, i.e. Species identity -- small nu means mostly flat
    R = list(V = 1, nu = 0.02)
  ) # This is the prior on your response variable, i.e. SSB -- small nu means mostly flat

  # Runs MCMCglmm fancy linear regression analysis
  MCMCanalysis <- MCMCglmm::MCMCglmm(SSB ~ TSP + 1, # Formula for analysis: SSB is related to TSP, PC, and some intercept
    random = ~Species, # Random effect -- species identity is a confounding factor in the analysis
    family = "categorical", # Our response variable is categorical
    ginverse = list(Species = inv.phylo), # These are our phylogenetic relationships, which cause covariance structure
    prior = prior, # Telling the MCMC what our priors are
    data = data_subset, # Telling the MCMC what our data is
    nitt = 1000, # The number of iterations, i.e. samples, should do more for a real analysis
    burnin = 100, # How long the MCMC spends "optimizing" before real samples
    thin = 1, # How often to print output (every 1)
    verbose = FALSE
  ) # Don't write out too much stuff

  print(summary(MCMCanalysis$Sol))
}

SSE

This analyses determines whether a particular binary trait (in this case, SSB) is associated with different rates of speciation and extinction. This is a simple model that only uses one binary trait, but better models also have “hidden” states. These hidden states are not observed traits of the organisms, but allow rates to be different on different parts of the tree due to unobserved factors. SSE analyses (and other phylogenetic analyses) are sensitive to the sampling fraction, i.e. the number of taxa in the dataset compared to how many species are actually in the group. We should do some research to determine whether there are appropriate ways to handle this.

# This code chunk runs a bisse analysis on just the taxa included in our dataset
# We don't want this to run every time we knit -- only when we manually want to run it
# To prevent us from running accidentally, we have RERUN <- FALSE at the start of the script
# We have to manually type RERUN <- TRUE for this to run
# We will save the MCMC to a file and have a separate code chunk to print a summary

if (RERUN) {
  states <- as.numeric(as.character(data$SSB)) # We need to make our SSB into a number instead of a factor for this analysis
  names(states) <- data$Species # diversitree requires us to name our states with species names
  lik <- diversitree::make.bisse(phy, states) # This is our bisse model, with our tree and tip states
  pars <- c(0.1, 0.1, 0.01, 0.01, 0.01, 0.01) # lambda 0 and 1 (speciation), mu 0 and 1 (extinction), q 01 and 10 (transition) initial values
  n_iter <- 100 # The number of MCMC iterations, i.e. samples (will use more for a real analysis)
  burnin <- 20 # The number of burnin iterations, i.e. how long we spend "optimizing" before real samples

  # Our MCMC analysis
  # Our tuning parameter (for MCMC proposals) is 0.1
  # We want to print to screen every 10 iterations
  MCMCanalysis <- diversitree::mcmc(lik, pars, nsteps = n_iter, w = 0.1, print.every = 10)
  MCMCresults <- MCMCanalysis[burnin:n_iter, ] # Removing burnin iterations
  write.table(MCMCresults, "bisse.tsv", quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE) # Saving our MCMC output to a file

  RERUN <- FALSE
}
MCMCresults <- read.csv("bisse.tsv", sep = "\t") # Retrieving our MCMC results from the bisse.csv file
MCMCsummary <- MCMCvis::MCMCsummary(coda::mcmc(MCMCresults), excl = c("i", "p"), Rhat = FALSE, round = 4) # Turning into MCMC object and summarizing output (excluding iteration and probability)
print(MCMCsummary)